BRIC Project
# clear workspace
##rm(list=ls())
getting the current WD
getwd()
# loading libraries
library(data.table) # extension of the data.frame package. It is widely used for fast aggregation of large datasets, low latency add/update/remove of columns, quicker ordered joins, and a fast file reader.
library(dplyr) # data manipulation package
library(lubridate)
library(zoo) # methods for totally ordered indexed observations. It aims at performing calculations containing irregular time series of numeric vectors, matrices & factors
library(stats)
library(utils)
library(tidyverse)
library(readr)
library(ggplot2)
library(reshape2)
Data column descriptions (Worldscope): https://www.professors.wi.tum.de/fileadmin/w00bca/fm/Worldscope_Data_Definition_Guide_Issue_15.pdf
https://docs.google.com/spreadsheets/d/1YtuJiv60Q6nKIaFJLQY60sGQErbsl_8nvPdUHmvO8vM/edit?usp=sharing
memory.limit(9999999999)
[1] 1e+10
# loading R.data BRIC monthly
load("C:/Users/johan/Documents/BRIC_Asset_Pricing_Project/BRIC_monthly.RData")
# loading R.data BRIC static
load("C:/Users/johan/Documents/BRIC_Asset_Pricing_Project/BRIC_static.RData")
strings not representable in native encoding will be translated to UTF-8input string 'NIZHNY NOVGOROD AIRCRAFT BUILDING PLANT愼㸰 OJSC' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'CHINA愼㸰DIGITAL愼㸰TV愼㸰HOLDING' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'CRAZY愼㸰INFOTECH愼㸰' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'KILITCH愼㸰DRUGS愼㸰(INDIA)愼㸰' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'SHIRPUR愼㸰GOLD愼㸰REFINERY' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'WINSOME TEXTILE愼㸰' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'PLETHICO PHARMACEUTICALS愼㸰' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'RAP MEDIA愼㸰' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'RICOH愼㸰INDIA' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'MTZ愼㸰POLYFILMS' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'BASANT AGRO TECH (INDIA)愼㸰' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'NAGPUR POWER & INDS 愼㸰' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'PAREKH愼㸰ALUMINEX' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'OMNITECH愼㸰INFOSOLUTIONS' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'MAYUR愼㸰LEATHER愼㸰PRODUCTS' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'SPARSH愼㸰BPO SVERVICES LTD' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string 'CAMLIN愼㸰FINE愼㸰CHEMICALS愼㸰 LIMITED' cannot be translated to UTF-8, is it valid in 'UTF-8' ?input string '愼㸰TOMSK TNSM.GRIDS' cannot be translated to UTF-8, is it valid in 'UTF-8' ?
# loading R.data BRIC yearly
load("C:/Users/johan/Documents/BRIC_Asset_Pricing_Project/BRIC_yearly.RData")
# loading R.data BRIC monthly
load("/Users/Manu/Desktop/TUM_Master_Mgt_Technology/TUM_SS_21/Empirical Asset Pricing seminar/Seminar Thesis/BRIC_data/BRIC_monthly.RData")
# loading R.data BRIC monthly
load("/Users/Manu/Desktop/TUM_Master_Mgt_Technology/TUM_SS_21/Empirical Asset Pricing seminar/Seminar Thesis/BRIC_data/BRIC_static.RData")
# loading R.data BRIC monthly
load("/Users/Manu/Desktop/TUM_Master_Mgt_Technology/TUM_SS_21/Empirical Asset Pricing seminar/Seminar Thesis/BRIC_data/BRIC_yearly.RData")
# loading R.data BRIC monthly
load("/Users/vinitkumar/Desktop/SS2021/Cases in Finance/BRIC_Asset_Pricing_Project/BRIC_Data/BRIC_monthly.RData")
# loading R.data BRIC monthly
load("/Users/vinitkumar/Desktop/SS2021/Cases in Finance/BRIC_Asset_Pricing_Project/BRIC_Data/BRIC_static.RData")
# loading R.data BRIC monthly
load("/Users/vinitkumar/Desktop/SS2021/Cases in Finance/BRIC_Asset_Pricing_Project/BRIC_Data/BRIC_yearly.RData")
# loading R.data BRIC monthly
load("/Users/cg/Desktop/TUM/SS21/Empirical Asset Pricing Seminar/Presentation/BRIC_Data/BRIC_monthly.RData")
# loading R.data BRIC yearly
load("/Users/cg/Desktop/TUM/SS21/Empirical Asset Pricing Seminar/Presentation/BRIC_Data/BRIC_yearly.RData")
# loading R.data BRIC static
load("/Users/cg/Desktop/TUM/SS21/Empirical Asset Pricing Seminar/Presentation/BRIC_Data/BRIC_static.RData")
#0 save raw data
BRIC.monthly.raw <- BRIC.monthly
BRIC.yearly.raw <- BRIC.yearly
#1 adjust date range (from July 1994 to December 2018)
#1.1 leave raw data with date adjustment
BRIC.monthly.withNA <- subset(BRIC.monthly.raw, Date >= "1994-06-30" & Date <= "2019-02-01")
BRIC.yearly.withNA <- subset(BRIC.yearly.raw, YEAR >= "1994" & YEAR <= "2019")
#1.2 working dataframe
BRIC.monthly <- subset(BRIC.monthly, Date >= "1994-06-30" & Date <= "2019-02-01")
BRIC.yearly <- subset(BRIC.yearly, YEAR >= "1994" & YEAR <= "2019")
at this point of time the data sheet is still balanced, therefore we can use the shift function
# add a new column for the lagged MV.USD (this is the MV.USD from the previous month)
BRIC.monthly[, LMV.USD := lag((MV.USD),1), by =Id]
# add a new column for volatility
BRIC.monthly[, volatility := lag(rollapplyr(RET.USD, 36, sd, fill = NA, partial = 12),1), by =Id]
# add 12 new columns for last 12 months returns
BRIC.monthly[, lag1 := lag((RET.USD),1), by =Id]
BRIC.monthly[, lag2 := lag((RET.USD),2), by =Id]
BRIC.monthly[, lag3 := lag((RET.USD),3), by =Id]
BRIC.monthly[, lag4 := lag((RET.USD),4), by =Id]
BRIC.monthly[, lag5 := lag((RET.USD),5), by =Id]
BRIC.monthly[, lag6 := lag((RET.USD),6), by =Id]
BRIC.monthly[, lag7 := lag((RET.USD),7), by =Id]
BRIC.monthly[, lag8 := lag((RET.USD),8), by =Id]
BRIC.monthly[, lag9 := lag((RET.USD),9), by =Id]
BRIC.monthly[, lag10 := lag((RET.USD),10), by =Id]
BRIC.monthly[, lag11 := lag((RET.USD),11), by =Id]
BRIC.monthly[, lag12 := lag((RET.USD),12), by =Id]
typeof(BRIC.monthly$lag1)
[1] "double"
# negative values are replaced to 0
#ifelse(BRIC.monthly$lag1<=0,0,ifelse(BRIC.monthly$lag1>0,1,NA))
BRIC.monthly$lag1 <- replace(BRIC.monthly$lag1,BRIC.monthly$lag1<=0,0)
BRIC.monthly$lag2 <- replace(BRIC.monthly$lag2,BRIC.monthly$lag2<=0,0)
BRIC.monthly$lag3 <- replace(BRIC.monthly$lag3,BRIC.monthly$lag3<=0,0)
BRIC.monthly$lag4 <- replace(BRIC.monthly$lag4,BRIC.monthly$lag4<=0,0)
BRIC.monthly$lag5 <- replace(BRIC.monthly$lag5,BRIC.monthly$lag5<=0,0)
BRIC.monthly$lag6 <- replace(BRIC.monthly$lag6,BRIC.monthly$lag6<=0,0)
BRIC.monthly$lag7 <- replace(BRIC.monthly$lag7,BRIC.monthly$lag7<=0,0)
BRIC.monthly$lag8 <- replace(BRIC.monthly$lag8,BRIC.monthly$lag8<=0,0)
BRIC.monthly$lag9 <- replace(BRIC.monthly$lag9,BRIC.monthly$lag9<=0,0)
BRIC.monthly$lag10 <- replace(BRIC.monthly$lag10,BRIC.monthly$lag10<=0,0)
BRIC.monthly$lag11 <- replace(BRIC.monthly$lag11,BRIC.monthly$lag11<=0,0)
BRIC.monthly$lag12 <- replace(BRIC.monthly$lag12,BRIC.monthly$lag12<=0,0)
# positive values are replaced to 1
BRIC.monthly$lag1 <- replace(BRIC.monthly$lag1,BRIC.monthly$lag1>0,1)
BRIC.monthly$lag2 <- replace(BRIC.monthly$lag2,BRIC.monthly$lag2>0,1)
BRIC.monthly$lag3 <- replace(BRIC.monthly$lag3,BRIC.monthly$lag3>0,1)
BRIC.monthly$lag4 <- replace(BRIC.monthly$lag4,BRIC.monthly$lag4>0,1)
BRIC.monthly$lag5 <- replace(BRIC.monthly$lag5,BRIC.monthly$lag5>0,1)
BRIC.monthly$lag6 <- replace(BRIC.monthly$lag6,BRIC.monthly$lag6>0,1)
BRIC.monthly$lag7 <- replace(BRIC.monthly$lag7,BRIC.monthly$lag7>0,1)
BRIC.monthly$lag8 <- replace(BRIC.monthly$lag8,BRIC.monthly$lag8>0,1)
BRIC.monthly$lag9 <- replace(BRIC.monthly$lag9,BRIC.monthly$lag9>0,1)
BRIC.monthly$lag10 <- replace(BRIC.monthly$lag10,BRIC.monthly$lag10>0,1)
BRIC.monthly$lag11 <- replace(BRIC.monthly$lag11,BRIC.monthly$lag11>0,1)
BRIC.monthly$lag12 <- replace(BRIC.monthly$lag12,BRIC.monthly$lag12>0,1)
BRIC.monthly$lagSum <- BRIC.monthly$lag1 + BRIC.monthly$lag2 + BRIC.monthly$lag3 + BRIC.monthly$lag4 + BRIC.monthly$lag5 + BRIC.monthly$lag6 + BRIC.monthly$lag7 + BRIC.monthly$lag8 + BRIC.monthly$lag9 + BRIC.monthly$lag10 + BRIC.monthly$lag11 + + BRIC.monthly$lag12
BRIC.monthly[ , pf.momentum := ifelse(lagSum>=8,"Winner",ifelse(lagSum < 8,"Looser",NA))]
# delete all entrys before July 1994
BRIC.monthly <- subset(BRIC.monthly, Date >= "1994-07-29" & Date <= "2019-02-01")
# calculate MV.USD.June for every column
# help columns
BRIC.monthly[,month := month(Date)]
BRIC.monthly[,year := year(Date)]
BRIC.monthly[,hcjun := ifelse(month>=7,year,year-1)]
# MV.USD.June column
intermediate <- filter(BRIC.monthly,BRIC.monthly$month == 7)
# minimise for join
intermediate <- subset(intermediate, select = c("Id","LMV.USD","hcjun"))
colnames(intermediate)[2] <- "MV.USD.June"
BRIC.monthly <- subset(BRIC.monthly, select = c("Id","country","Date","MV","MV.USD","LMV.USD","RET","RET.USD","volatility","pf.momentum","ym","hcjun","year","month"))
# inner join automatically deletes values where MV.USD.June is not available
BRIC.monthly <- inner_join(x = BRIC.monthly,y = intermediate, by = c("Id","hcjun"))
# order BRIC.monthly
BRIC.monthly <- subset(BRIC.monthly, select = c("Id","country","Date","MV","MV.USD","LMV.USD","MV.USD.June","RET","RET.USD","volatility","pf.momentum","ym","hcjun","year","month"))
# scale
BRIC.monthly$MV <- BRIC.monthly$MV * 1000000
BRIC.monthly$MV.USD <- BRIC.monthly$MV.USD * 1000000
# delete na's, set 0 and calculate important values
# no RET.USD data, no MV or no LMV.USD
BRIC.monthly <- BRIC.monthly %>%
drop_na(RET.USD,MV,MV.USD,LMV.USD,MV.USD.June,volatility,pf.momentum)
#3.0 the BRIC.yearly panel here is still balanced! We need to add a column for total assets the year before ----
#add the lagged WC03501 (shifted by Id) (this is the WC03501 from the previous month)
BRIC.yearly[, lag.value:=c(0, WC03501[-.N]), by=Id]
# rename column to LMV.USD
colnames(BRIC.yearly)[106] <- "TotalAssetsBefore"
#3.1 book value / equity and related ----
# delete rows with no WC03501 (Common equity)
BRIC.yearly <- BRIC.yearly %>%
drop_na(WC03501)
# set WC03263 (deferred taxes) zero if NA
BRIC.yearly$WC03263 <- BRIC.yearly$WC03263 %>% replace_na(0)
# add column for BookValue (Hanauer 2020 calculation)
BRIC.yearly$BookValue <- BRIC.yearly$WC03501 + BRIC.yearly$WC03263
# delete rows with negative BookValue
BRIC.yearly <- BRIC.yearly[BRIC.yearly$BookValue >= 0,]
#3.2 total assets ----
# delete rows with no WC02999 (total assets)
BRIC.yearly <- BRIC.yearly %>%
drop_na(WC02999)
# delete rows with no TotalAssetsBefore
BRIC.yearly <- BRIC.yearly %>%
drop_na(TotalAssetsBefore)
#3.3 operating profits ----
# "To have a valid value, at least one of cost components cost of goods sold, selling, general and administrative expenses, or interest expense must be non-missing." (Hanauer, 2019, p. 284) --> if one of these values is missing, we must delete these rows WC01001,WC01051,WC01101,WC01251
# delete rows where ALL 4 columns are NA
BRIC.yearly <- filter(BRIC.yearly,!is.na(WC01001) | !is.na(WC01051) | !is.na(WC01101) | !is.na(WC01251))
# replace all na's in this 4 columns with 0
BRIC.yearly$WC01001 <- BRIC.yearly$WC01001 %>% replace_na(0)
BRIC.yearly$WC01051 <- BRIC.yearly$WC01051 %>% replace_na(0)
BRIC.yearly$WC01101 <- BRIC.yearly$WC01101 %>% replace_na(0)
BRIC.yearly$WC01251 <- BRIC.yearly$WC01251 %>% replace_na(0)
# calculate operating profits (Hanauer, 2019, p.284)
BRIC.yearly$OperatingProfits <- (BRIC.yearly$WC01001 - BRIC.yearly$WC01051 - BRIC.yearly$WC01101 - BRIC.yearly$WC01251)
# 3.4 minimise data frame ----
# for BRIC.yearly we keep: Id, country, ICBSUC, YEAR, BookValue, OperatingProfits and total assets
# Note: WC07201 is not used, as our MV should be the MV from the monthly data for 06.y !
BRIC.yearly <- subset(BRIC.yearly, select = c("Id","country","ICBSUC","YEAR","BookValue","OperatingProfits","WC02999","TotalAssetsBefore"))
# we rename WC02999 to total assets
colnames(BRIC.yearly)[7] <- "TotalAssets"
# 3.5 create a help column hcdec (1 year lag) ----
BRIC.yearly$hcdec <- BRIC.yearly$YEAR + 1
# scale
BRIC.yearly$BookValue <- BRIC.yearly$BookValue *1000
BRIC.yearly$OperatingProfits <- BRIC.yearly$OperatingProfits *1000
BRIC.yearly$WC02999 <- BRIC.yearly$WC02999 *1000
BRIC.yearly$TotalAssetsBefore <- BRIC.yearly$TotalAssetsBefore *1000
# load data sheet from French's website
FFData <- read_csv("FF_Research_Data_5_Factors_2x3.CSV",
skip = 2)
# shorting data frame
one_m_tbill <- as.data.frame(FFData[c("X1","RF")][1:693,])
# adding a ym column to risk free rate data
one_m_tbill$ym<-as.yearmon(one_m_tbill$X1, "%Y %m")
#delete X1 column
one_m_tbill <- subset(one_m_tbill,select = c("ym","RF"))
# merge risk-free rate (1 month treasury bill rate) with monthly data
BRIC.monthly <- left_join(x = BRIC.monthly, y = one_m_tbill, by = "ym")
# make rf column numeric
BRIC.monthly$RF <- as.numeric(BRIC.monthly$RF)
# Add RiRF ----
BRIC.monthly$RiRF <- BRIC.monthly$RET.USD - BRIC.monthly$RF
# Local currency RiRF
BRIC.monthly$RiRF.local <- BRIC.monthly$RET - BRIC.monthly$RF
NOTE: Here we “loose” around 600 000 rows!
BRIC.maindata <- inner_join(x = BRIC.monthly,y = BRIC.yearly, by = c("Id","hcjun" = "hcdec"))
# Add a B/M column
BRIC.maindata$BM <- BRIC.maindata$BookValue / BRIC.maindata$MV
# Add a OP/BE column
BRIC.maindata$OPBE <- BRIC.maindata$OperatingProfits / BRIC.maindata$BookValue
# Add a AssetChange column
BRIC.maindata$AssetChange <- ((BRIC.maindata$TotalAssets - BRIC.maindata$TotalAssetsBefore)/BRIC.maindata$TotalAssetsBefore)
BRIC.maindata <- subset(BRIC.maindata, hcjun >= "1996")
If we want to calculate factors or anything else for countries standalone we will have to filter them here! For multiple trys run 4.2 again, to reset BRIC.maindata
# e.g. filter for China
##BRIC.maindata <- filter(BRIC.maindata,BRIC.maindata$country.x == "CHN")
setorder(BRIC.maindata,Date,-MV.USD.June)
hlpvariable <- BRIC.maindata[month==7 & !is.na(MV.USD.June),
.(pf.size = ifelse((cumsum(MV.USD.June)/sum(MV.USD.June))>=0.9,"Small","Big"),Id),
by=year]
# Merge the size portfolio allocation back from July Y to June Y+1
BRIC.maindata <- merge(BRIC.maindata,hlpvariable,
by.x=c("hcjun","Id"),
by.y=c("year","Id"),
all.x=T)
# delete NA's (only about 1000 rows)
BRIC.maindata <- na.omit(BRIC.maindata,cols = "pf.size")
RiRF used for benchmark in p.50 lecture slides For annualisation we used arithmetic average * 12
# create copy
BRIC.benchmark <- BRIC.maindata
# filter on pf.size
BRIC.benchmark <- subset(BRIC.benchmark, pf.size == "Big" )
# value weight returns
## calculate monthly market value over all stocks
BRIC.benchmark.valueWeights <- aggregate(LMV.USD ~ ym, data = BRIC.benchmark, FUN = sum)
colnames(BRIC.benchmark.valueWeights)[2] <- "TotalValue"
## join value weights to stocks
BRIC.benchmark <- inner_join(x = BRIC.benchmark, y = BRIC.benchmark.valueWeights, by = "ym")
## calculate value weight
BRIC.benchmark$ValueWeight <- BRIC.benchmark$LMV.USD/BRIC.benchmark$TotalValue
## calculate value weight excess return / return
BRIC.benchmark$wRet <- BRIC.benchmark$ValueWeight*BRIC.benchmark$RET.USD
BRIC.benchmark$wExRet <- BRIC.benchmark$ValueWeight * BRIC.benchmark$RiRF
# per month
benchmark.retBricM <- aggregate(cbind(wRet,wExRet) ~ ym + hcjun, data = BRIC.benchmark,FUN = sum)
# per year
benchmark.retBricY <- aggregate(cbind(wRet,wExRet) ~ hcjun, data = benchmark.retBricM,FUN = mean)
## annualise
benchmark.retBricY$wRet <- benchmark.retBricY$wRet*12
benchmark.retBricY$wExRet <- benchmark.retBricY$wExRet*12
# per country
benchmark.retCountryM <- aggregate(cbind(wRet,wExRet) ~ country.x + ym + hcjun, data = BRIC.benchmark,FUN = sum)
# per year
benchmark.retCountryY <- aggregate(cbind(wRet,wExRet) ~ country.x + hcjun, data = benchmark.retCountryM,FUN = mean)
## annualise
benchmark.retCountryY$wRet <- benchmark.retCountryY$wRet*12
benchmark.retCountryY$wExRet <- benchmark.retCountryY$wExRet*12
# single dataframe
benchmark_BRIC <- inner_join(benchmark.retBricY,benchmark.retBricM,by = "hcjun")
benchmark_Country <- inner_join(benchmark.retCountryY,benchmark.retCountryM,by = c("hcjun","country.x"))
# rename columns
colnames(benchmark_BRIC)[2] <- "YearlyReturn"
colnames(benchmark_BRIC)[3] <- "YearlyExcessReturn"
colnames(benchmark_BRIC)[5] <- "MonthlyReturn"
colnames(benchmark_BRIC)[6] <- "MonthlyExcessReturn"
colnames(benchmark_Country)[3] <- "YearlyReturn"
colnames(benchmark_Country)[4] <- "YearlyExcessReturn"
colnames(benchmark_Country)[6] <- "MonthlyReturn"
colnames(benchmark_Country)[7] <- "MonthlyExcessReturn"
# create copy
BRIC.market <- BRIC.maindata
# value weight returns
## calculate monthly market value over all stocks
BRIC.market.valueWeights <- aggregate(LMV.USD ~ ym, data = BRIC.market, FUN = sum)
colnames(BRIC.market.valueWeights)[2] <- "TotalValue"
## join value weights to stocks
BRIC.market <- inner_join(x = BRIC.market, y = BRIC.market.valueWeights, by = "ym")
## calculate value weight
BRIC.market$ValueWeight <- BRIC.market$LMV.USD/BRIC.market$TotalValue
## calculate value weight excess return / return
BRIC.market$wRet <- BRIC.market$ValueWeight*BRIC.market$RET.USD
BRIC.market$wExRet <- BRIC.market$ValueWeight * BRIC.market$RiRF
# per month
market.retBricM <- aggregate(cbind(wRet,wExRet) ~ ym + hcjun, data = BRIC.market,FUN = sum)
# per year
market.retBricY <- aggregate(cbind(wRet,wExRet) ~ hcjun, data = market.retBricM,FUN = mean)
## annualise
market.retBricY$wRet <- market.retBricY$wRet*12
market.retBricY$wExRet <- market.retBricY$wExRet*12
# per country
market.retCountryM <- aggregate(cbind(wRet,wExRet) ~ country.x + ym + hcjun, data = BRIC.market,FUN = sum)
# per year
market.retCountryY <- aggregate(cbind(wRet,wExRet) ~ country.x + hcjun, data = market.retCountryM,FUN = mean)
## annualise
market.retCountryY$wRet <- market.retCountryY$wRet*12
market.retCountryY$wExRet <- market.retCountryY$wExRet*12
# single dataframe
market_BRIC <- inner_join(market.retBricY,market.retBricM,by = "hcjun")
market_Country <- inner_join(market.retCountryY,market.retCountryM,by = c("hcjun","country.x"))
# rename columns
colnames(market_BRIC)[2] <- "YearlyReturn"
colnames(market_BRIC)[3] <- "YearlyExcessReturn"
colnames(market_BRIC)[5] <- "MonthlyReturn"
colnames(market_BRIC)[6] <- "MonthlyExcessReturn"
colnames(market_Country)[3] <- "YearlyReturn"
colnames(market_Country)[4] <- "YearlyExcessReturn"
colnames(market_Country)[6] <- "MonthlyReturn"
colnames(market_Country)[7] <- "MonthlyExcessReturn"
Breakpoints for RMW and CMA are based on Jiao(2017) and FF(2015) 2x3 sort = NYSE Percentiles (FF(2015) p.6) [PortfolioSorts.] (“Images/PortfolioSorts.jpg”) ## 4.6 - 4.8 Determine other breakpoints
# 4.6 HML: Determine B/M breakpoints ----
# Breakpoints for RMW and CMA are based on Jiao(2017) and FF(2015) 2x3 sort = NYSE Percentiles (FF(2015) p.6)
#![PortfolioSorts.] ("Images/PortfolioSorts.jpg")
# Determine the B/M breakpoints based on big stocks only
hlpvariable2 <- BRIC.maindata[month==7 & !is.na(BM) & pf.size=="Big", .(bm_bb30 = quantile(BM , probs = c(0.3), na.rm=T),
bm_bb70 = quantile(BM , probs = c(0.7), na.rm=T)),by=year]
# Merge the B/M portfolio allocation back from July Y to June Y+1
BRIC.maindata <- merge(BRIC.maindata,hlpvariable2,
by.x=c("hcjun"),
by.y=c("year"),
all.x=T)
BRIC.maindata[ , pf.bm := ifelse(BM>bm_bb70,"High",ifelse((BM<=bm_bb70 & BM>bm_bb30),"Neutral",ifelse(BM<=bm_bb30,"Low",NA)))]
BRIC.maindata[, SIZE_VALUE := paste0(pf.size,".",pf.bm)]
# 4.7 RMW: Determine OP/BE breakpoints ----
# Determine the OP/BE breakpoints based on big stocks only
hlpvariable2 <- BRIC.maindata[month==7 & !is.na(OPBE) & pf.size=="Big", .(opbe_bb30 = quantile(OPBE , probs = c(0.3), na.rm=T),
opbe_bb70 = quantile(OPBE , probs = c(0.7), na.rm=T)),by=year]
# Merge the OP/BE portfolio allocation back from July Y to June Y+1
BRIC.maindata <- merge(BRIC.maindata,hlpvariable2,
by.x=c("hcjun"),
by.y=c("year"),
all.x=T)
# 3 OP/BE brackets: Robust, Neutral and Weak
BRIC.maindata[ , pf.opbe := ifelse(OPBE>opbe_bb70,"Robust",ifelse((OPBE<=opbe_bb70 & OPBE>opbe_bb30),"Neutral",ifelse(OPBE<=opbe_bb30,"Weak",NA)))]
BRIC.maindata[, SIZE_PROFITABILITY := paste0(pf.size,".",pf.opbe)]
# 4.8 CMA: Determine Asset Change Breakpoints ----
#investment: As in Cooper et al. (2008), we measure asset growth in June of year y as the percentage change in total assets (WC02999) from fiscal year ending in calendar year y−2 to fiscal year ending in calendar year y−1.
# Determine the AC breakpoints based on big stocks only
hlpvariable2 <- BRIC.maindata[month==7 & !is.na(AssetChange) & pf.size=="Big", .(ac_bb30 = quantile(AssetChange , probs = c(0.3), na.rm=T),
ac_bb70 = quantile(AssetChange , probs = c(0.7), na.rm=T)),by=year]
# Merge the AC portfolio allocation back from July Y to June Y+1
BRIC.maindata <- merge(BRIC.maindata,hlpvariable2,
by.x=c("hcjun"),
by.y=c("year"),
all.x=T)
# 3 investment brackets: Aggressive, neutral and conservative
BRIC.maindata[ , pf.ac := ifelse(AssetChange>ac_bb70,"Aggressive",ifelse((AssetChange<=ac_bb70 & AssetChange>ac_bb30),"Neutral",ifelse(AssetChange<=ac_bb30,"Conservative",NA)))]
BRIC.maindata[, SIZE_INVESTMENT := paste0(pf.size,".",pf.ac)]
https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/Data_Library/f-f_5_factors_2x3.html
##4.9 Calculate Factors
summary(BRIC.maindata$BM)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000 0.2215 0.4452 1.5640 1.0005 724.0493
ISIN: International Security Identification Number (stock identifier) ESTAT: active vs inactive company (publicly listed or not) Id: join column with BRIC.yearly dataframe INDM: industry sector code
GEOGN: geographic group name GEOLN: geographic location
List of Database codes: https://www.bwl.uni-mannheim.de/media/Lehrstuehle/bwl/Maug/Database_info/Datastream_dataypes.pdf
ID: ?? Country: 4 BRIC COUNTRY CODES ICBSUC: industrial classification benchmark https://link.springer.com/content/pdf/bbm%3A978-3-8350-9531-1%2F1.pdf
WC07021: SIC(standard industrial classification) primary code from Worldscope W05651: Common Shares Traded - Annual (Security)
TO DO’s:
Project Dates: Thesis submission: June 21 Final presentation: June 07
Strategy: GDP weighted countries; stock level: max sharpe ratio, min volatility, equal sector weights or quotas, momentum?
EDA
To DO: compute correlations by sector (10 sectors)
# regular correlation matrix of all (four) numeric attributes
cor(select(BRIC.static, where(is.numeric)))
Some conventions:
Characteristic should be calculated as in Hanauer & Lauterbach (2019) or in Hanauer (2020)
Big stocks should be defined as the biggest stocks which together account for 90% of a country’s aggregated market capitalization Benchmark should be defined as the cap-weighted universe of big stocks Returns should be in USD Breakpoints (for Fama-French factors) should be calculated on big stocks (as in the excursus) but both small and big stocks go into the factor calculation.
#5.STRATEGY - Multifactor Portfolio sort Momentum sorting is done during #2. ##5.1 Value Breakpoints
BRIC.strategy <- BRIC.maindata
colnames(BRIC.strategy)
[1] "hcjun" "Id" "country.x" "Date" "MV" "MV.USD"
[7] "LMV.USD" "MV.USD.June" "RET" "RET.USD" "volatility" "pf.momentum"
[13] "ym" "year" "month" "RF" "RiRF" "RiRF.local"
[19] "country.y" "ICBSUC" "YEAR" "BookValue" "OperatingProfits" "TotalAssets"
[25] "TotalAssetsBefore" "WC02999" "BM" "OPBE" "AssetChange" "pf.size"
[31] "bm_bb30" "bm_bb70" "pf.bm" "SIZE_VALUE" "opbe_bb30" "opbe_bb70"
[37] "pf.opbe" "SIZE_PROFITABILITY" "ac_bb30" "ac_bb70" "pf.ac" "SIZE_INVESTMENT"
## Value factor
## creating a large cap and high B/M ratio column (large cap value)
# subsetting the large cap stocks only (rebalanced yearly)
BRIC.strategy <- subset(BRIC.strategy, pf.size == "Big" )
# subsetting only value stocks (cutoff = median) Look up CUTOFF details
# Determine the value breakpoints
hlpvariable2 <- BRIC.maindata[month==7, .(median = quantile(BM , probs = c(0.5), na.rm=T)),by=year]
# Merge the value portfolio allocation back from July Y to June Y+1
BRIC.strategy <- merge(BRIC.strategy,hlpvariable2,
by.x=c("hcjun"),
by.y=c("year"),
all.x=T)
BRIC.strategy[ , pf.value := ifelse(BM>median,"Value",(ifelse(BM<=median,"Growth",NA)))]
table(BRIC.strategy$pf.value)
Growth Value
148039 74405
dim(BRIC.strategy)
[1] 222444 44
##5.2 Low Vol Strategy implementation on the balanced panel data (Multifactor etf)
# Determine the low_vol breakpoints based on big stocks only
hlpvariable2 <- BRIC.strategy[month==7, .(lower_20 = quantile(volatility , probs = c(0.2), na.rm=T)),by=year]
# Merge the low_vol portfolio allocation back from July Y to June Y+1
BRIC.strategy <- merge(BRIC.strategy,hlpvariable2,
by.x=c("hcjun"),
by.y=c("year"),
all.x=T)
BRIC.strategy[ , pf.low_vol := ifelse(volatility>lower_20,"HighVol",(ifelse(volatility<=lower_20,"LowVol",NA)))]
table(BRIC.strategy$pf.low_vol)
HighVol LowVol
175127 47317
dim(BRIC.strategy)
[1] 222444 46
##5.3 Filtering for strategy requirements
# filtering for growth stocks
BRIC.strategy <- subset(BRIC.strategy, pf.value == "Growth" )
# filtering for lowVol stocks
BRIC.strategy <- subset(BRIC.strategy, pf.low_vol == "LowVol" )
# filtering for winner stocks
BRIC.strategy <- subset(BRIC.strategy, pf.momentum == "Winner" )
dim(BRIC.strategy)
summary(BRIC.strategy)
strat.EqualWeights <- aggregate(Id ~ ym, data = BRIC.strategy, FUN=function(x) length(unique(x)))
colnames(strat.EqualWeights)[2] <- "NumberOfStocks"
# average number of stocks
strategy.avgNumberOfStocks <- sum(strat.EqualWeights$NumberOfStocks)/length(strat.EqualWeights$NumberOfStocks)
bench.EqualWeights <- aggregate(Id ~ ym, data = BRIC.benchmark, FUN=function(x) length(unique(x)))
colnames(bench.EqualWeights)[2] <- "NumberOfStocks"
# average number of stocks
benchmark.avgNumberOfStocks <- sum(bench.EqualWeights$NumberOfStocks)/length(bench.EqualWeights$NumberOfStocks)
#6 Portfolio Statistics All teams should perform the following analyses:
NOTE RUN only one of the 6.0.x junks ### 6.1.1 Equal weights // Every stock has the same weight within a ym
# assign a new working variable
strategy <- BRIC.strategy
strategy.EqualWeights <- aggregate(Id ~ ym, data = strategy, FUN=function(x) length(unique(x)))
colnames(strategy.EqualWeights)[2] <- "NumberOfStocks"
## join value weights to stocks
strategy <- inner_join(x = strategy, y = strategy.EqualWeights, by = "ym")
## calculate value weight
strategy$Weight <- 1/strategy$NumberOfStocks
## calculate value weight excess return / return
strategy$wRet <- strategy$Weight*strategy$RET.USD
strategy$wExRet <- strategy$Weight * strategy$RiRF
# TOP 10 ----
strategy.EqualWeights$weight <- 1/strat.EqualWeights$NumberOfStocks
strategy.topTenBricT <- mean(strategy.EqualWeights$weight)
// every stock has a weight according to its marketcap in the ym
# assign a new working variable
strategy <- BRIC.strategy
# value weight returns
## calculate monthly market value over all stocks
strategy.valueWeights <- aggregate(LMV.USD ~ ym, data = strategy, FUN = sum)
colnames(strategy.valueWeights)[2] <- "TotalValue"
## join value weights to stocks
strategy <- inner_join(x = strategy, y = strategy.valueWeights, by = "ym")
## calculate value weight
strategy$Weight <- strategy$LMV.USD/strategy$TotalValue
## calculate value weight excess return / return
strategy$wRet <- strategy$Weight*strategy$RET.USD
strategy$wExRet <- strategy$Weight * strategy$RiRF
# TOP 10 ----
strategy_topTen <- strategy %>% arrange(desc(Weight)) %>% group_by(ym) %>% top_n(wt=Weight,10)
strategy.topTenBricY <- aggregate(Weight ~ ym, data = strategy_topTen, FUN = sum)
strategy.topTenBricT <- mean(strategy.topTenBricY$Weight)
# RETURNS ----
# per month
strategy.retBricM <- aggregate(cbind(wRet,wExRet) ~ ym + hcjun, data = strategy,FUN = sum)
# per year
strategy.retBricY <- aggregate(cbind(wRet,wExRet) ~ hcjun, data = strategy.retBricM,FUN = mean)
# annualisation
strategy.retBricY$wRet <- strategy.retBricY$wRet * 12
strategy.retBricY$wExRet <- strategy.retBricY$wExRet * 12
# mean return of whole period
strategy.retBricT <- mean(strategy.retBricY$wRet) # 15.329091 %
# mean excess return of whole period
strategy.retExBricT <- mean(strategy.retBricY$wExRet) # 13.111509 %
# STANDARD DEVIATION ----
# we calculate the sd out of the annualised portfolio returns
strategy.sdBricT <- sd(strategy.retBricY$wRet) # 37.155688
# SHARP RATIO ----
strategy.srBricT <- strategy.retExBricT/strategy.sdBricT # 0.352880
# MAX DRAWDOWN ----
drawdown <- function(ret) {
cum.ret <- c(0, cumsum(ret))
drawdown <- cum.ret - cummax(cum.ret)
return(tail(drawdown, -1))
}
maxdrawdown <- function(ret)min(drawdown(ret))
ret <- strategy.retBricY$wRet
strategy.mdBricT <- maxdrawdown(ret) # -99.1467
# TRACKING ERROR ----
# merge portfolio returns and benchmark returns
strategy_benchmark <- inner_join(x = strategy.retBricY, y = benchmark.retBricY, by = "hcjun")
colnames(strategy_benchmark)[2] <- "wRet_strategy"
colnames(strategy_benchmark)[3] <- "wExRet_strategy"
colnames(strategy_benchmark)[4] <- "wRet_benchmark"
colnames(strategy_benchmark)[5] <- "wExRet_benchmark"
strategy_benchmark$pf_activeReturn <- strategy_benchmark$wRet_strategy - strategy_benchmark$wRet_benchmark
strategy.teBricT <- sd(strategy_benchmark$pf_activeReturn) # 26.831411 %
# INFROMATION RATIO ----
strategy.irBricT <- mean(strategy_benchmark$pf_activeReturn)/strategy.teBricT # 0.26754
IMPORTANT NOTE: If we would like to apply the whole strategy for a country stand-alone we would have to filter in 5.3 for a country and run the “BRIC”-code in part 6. Here we calculate statistics for single countries, but they’re based on a portfolio selection with stocks of the whole BRIC region. –> strong BIAS
# RETURNS
# per month
strategy.retCountryM <- aggregate(cbind(wRet,wExRet) ~ country.x + ym + hcjun, data = strategy,FUN = sum)
# per year
strategy.retCountryY <- aggregate(cbind(wRet,wExRet) ~ country.x + hcjun, data = strategy.retCountryM,FUN = mean)
# annualisation
strategy.retCountryY$wRet <- strategy.retCountryY$wRet * 12
strategy.retCountryY$wExRet <- strategy.retCountryY$wExRet * 12
# mean excess return and return of whole period
strategy.retCountryT <- aggregate(cbind(wRet,wExRet) ~ country.x, data = strategy.retCountryY,FUN = mean)
# STANDARD DEVIATION
strategy.sdCountryT <- aggregate(wRet ~ country.x, data = strategy.retCountryY, FUN = sd)
colnames(strategy.sdCountryT)[2] <- "SDwholeperiod"
# SHARP RATIO
strategy.srCountryT = inner_join(x = strategy.retCountryT, y = strategy.sdCountryT, by = "country.x" )
strategy.srCountryT$SR <- strategy.srCountryT$wExRet/strategy.srCountryT$SDwholeperiod
# MAX DRAWDOWN
# td
# TRACKING ERROR
# merge portfolio returns and benchmark returns
strategy_benchmark_c <- inner_join(x = strategy.retCountryY, y = benchmark.retCountryY, by = c("hcjun","country.x"))
colnames(strategy_benchmark_c)[3] <- "wRet_portfolio"
colnames(strategy_benchmark_c)[4] <- "wExRet_portfolio"
colnames(strategy_benchmark_c)[5] <- "wRet_benchmark"
colnames(strategy_benchmark_c)[6] <- "wExRet_benchmark"
strategy_benchmark_c$pf_activeReturn <- strategy_benchmark_c$wRet_portfolio - strategy_benchmark_c$wRet_benchmark
strategy.teCountryT <- aggregate(pf_activeReturn ~ country.x, data = strategy_benchmark_c, FUN = sd)
colnames(strategy.teCountryT)[2] <- "TrackingError"
# INFROMATION RATIO
strategy.irCountryT_intermediate <- aggregate(pf_activeReturn ~ country.x, data = strategy_benchmark_c, FUN = mean)
colnames(strategy.irCountryT_intermediate)[2] <- "meanActiveRet"
strategy.irCountryT <- inner_join(x = strategy.teCountryT, y = strategy.irCountryT_intermediate, by = "country.x")
strategy.irCountryT$InformationRatio <- strategy.irCountryT$meanActiveRet / strategy.irCountryT$TrackingError
# mean return of whole period
benchmark.retBricT <- mean(benchmark.retBricY$wRet)
# mean excess return of whole period
benchmark.retExBricT <- mean(benchmark.retBricY$wExRet)
# STANDARD DEVIATION
# we calculate the sd out of the annualised portfolio returns
benchmark.sdBricT <- sd(benchmark.retBricY$wRet)
# SHARP RATIO
benchmark.srBricT <- benchmark.retExBricT/benchmark.sdBricT
# MAX DRAWDOWN
drawdown <- function(ret) {
cum.ret <- c(0, cumsum(ret))
drawdown <- cum.ret - cummax(cum.ret)
return(tail(drawdown, -1))
}
maxdrawdown <- function(ret)min(drawdown(ret))
ret <- benchmark.retBricY$wRet
benchmark.mdBricT <- maxdrawdown(ret)
# TRACKING ERROR
# merge portfolio returns and benchmark returns
benchmark_benchmark <- inner_join(x = benchmark.retBricY, y = benchmark.retBricY, by = "hcjun")
colnames(benchmark_benchmark)[2] <- "wRet_benchmark"
colnames(benchmark_benchmark)[3] <- "wExRet_benchmark"
colnames(benchmark_benchmark)[4] <- "wRet_benchmark"
colnames(benchmark_benchmark)[5] <- "wExRet_benchmark"
benchmark_benchmark$pf_activeReturn <- benchmark_benchmark$wRet_benchmark - benchmark_benchmark$wRet_benchmark
benchmark.teBricT <- sd(benchmark_benchmark$pf_activeReturn)
# INFROMATION RATIO
benchmark.irBricT <- mean(benchmark_benchmark$pf_activeReturn)/benchmark.teBricT
# TOP 10 ----
benchmark_topTen <- BRIC.benchmark %>% arrange(desc(ValueWeight)) %>% group_by(ym) %>% top_n(wt=ValueWeight,10)
benchmark.topTenBricY <- aggregate(ValueWeight ~ ym, data = benchmark_topTen, FUN = sum)
benchmark.topTenBricT <- mean(benchmark.topTenBricY$ValueWeight)
library(reshape)
library(xts)
# strategy turnover ----
# get intermediate data frame
turnover_calc <- strategy
s_weights <- subset(turnover_calc,select= c("ym","Weight","Id"))
s_weights <-reshape(s_weights, idvar = "ym", timevar = "Id", direction = "wide")
s_weights <-as.xts(s_weights)
s_weights[is.na(s_weights)]=0
s_lead_weights <- as.data.frame(s_weights)
s_lead_weights <-s_lead_weights %>% mutate_all(lead)
as.data.frame(s_lead_weights)
s_lead_weights<-as.data.frame(s_lead_weights)
s_weights<-as.data.frame(s_weights)
s_turnover_weights<-as.data.frame(txns <- s_lead_weights - s_weights) #s_tunover_weights - stuff inside the bracker in the fromula
s_turnover_weights[is.na(s_turnover_weights)]=0
s_turnover_pattern <- as.data.frame(rowSums(abs(s_turnover_weights[,1:length(s_turnover_weights)])),order.by=index(s_turnover_weights))
colnames(s_turnover_pattern)[1] <- "one"
s_turnover_pattern<-as.data.frame(s_turnover_pattern)
strategy.turnover <- (sum(s_turnover_pattern$one) / (2*nrow(s_turnover_pattern)))
# benchmark turnover ----
# get intermediate data frame
turnover_calc <- BRIC.benchmark
s_weights <- subset(turnover_calc,select= c("ym","ValueWeight","Id"))
s_weights <-reshape(s_weights, idvar = "ym", timevar = "Id", direction = "wide")
s_weights <-as.xts(s_weights)
s_weights[is.na(s_weights)]=0
s_lead_weights <- as.data.frame(s_weights)
s_lead_weights <-s_lead_weights %>% mutate_all(lead)
as.data.frame(s_lead_weights)
s_lead_weights<-as.data.frame(s_lead_weights)
s_weights<-as.data.frame(s_weights)
s_turnover_weights<-as.data.frame(txns <- s_lead_weights - s_weights) #s_tunover_weights - stuff inside the bracker in the fromula
s_turnover_weights[is.na(s_turnover_weights)]=0
s_turnover_pattern <- as.data.frame(rowSums(abs(s_turnover_weights[,1:length(s_turnover_weights)])),order.by=index(s_turnover_weights))
colnames(s_turnover_pattern)[1] <- "one"
s_turnover_pattern<-as.data.frame(s_turnover_pattern)
benchmark.turnover <- (sum(s_turnover_pattern$one) / (2*nrow(s_turnover_pattern)))
full_BRIC_ValueWeight <- c(strategy.retBricT,strategy.retExBricT,strategy.sdBricT,strategy.srBricT,strategy.mdBricT,strategy.teBricT,strategy.irBricT, strategy.avgNumberOfStocks, strategy.topTenBricT, strategy.turnover)
full_BRIC_Benchmark <- c(benchmark.retBricT,benchmark.retExBricT,benchmark.sdBricT,benchmark.srBricT,benchmark.mdBricT,benchmark.teBricT,benchmark.irBricT, benchmark.avgNumberOfStocks, benchmark.topTenBricT, benchmark.turnover)
full_BRIC_ValueWeight
# join factors, strategy and benchmark ----
spanning_1 <- left_join(x = factors, y = strategy.retBricM, by = "ym")
spanning <- left_join(x = spanning_1, y = market.retBricM, by = "ym")
colnames(spanning)[8] <- "Strategy_RET"
colnames(spanning)[9] <- "Strategy_RiRF"
colnames(spanning)[11] <- "Market_RET"
colnames(spanning)[12] <- "Market_RiRF"
# correlation matrix between factors
head(spanning)
dim(spanning) # 280 (24 years * 12 months) x 5
## cor(spanning[,-"ym"]) # correlation matrix between factors for the BRIC region
summary(spanning) # to use for a table
# Spanning Tests ----
## FF3FM
summary(lm(data=spanning, formula = Strategy_RiRF ~ Market_RiRF + SMB + HML))
## FF5FM
summary(lm(data=spanning, formula = Strategy_RiRF ~ Market_RiRF + SMB + HML + CMA + RMW))
## FF3FM + MOM
summary(lm(data=spanning, formula = Strategy_RiRF ~ Market_RiRF + SMB + HML + MOM))
## FF5FM + MOM
summary(lm(data=spanning, formula = Strategy_RiRF ~ Market_RiRF + SMB + HML + CMA + RMW + MOM))
List of plots: 1) Cummulative performance Benchmark vs Strategy BRIC // VINIT/MANU revisit 2) Single factors vs multifactor portfolio in risk-return space // MANU revisit 3) Evolution of MarketCap weights per country –> regional weighting (implicit) // ?? // use GDP plot code and update it
List of tables: 1) Performance and risk // MANU revisit –> JOHANNES gives the data 2) Investability // MANU revisit 3) Spanning tests_Style exposure //
Table/Plot: 1) Strategy BRIC vs Countries stand alone // JOHANNES ## Tables done 2) Strategy parts stand alone and/or 2 combined vs 3ple sort // Johannes ## Tables done